# load libraries
library(foreign)
library(readxl)
library(tidyverse)
library(ggplot2)
library(forecast)
library(Hmisc)
library(plm)
library(lmtest)
library(readstata13)
library(xlsx)
library(stargazer)


# ------- Read data
# Crop diversification
d0 <- read_excel("Crop_Diversification.xlsx")

# Main data set  
d1_1 <- read_excel("all_agri_data_CPI_2004_05.xlsx")

# Merge the two data sets
d1 <- left_join(d1_1, d0, by = c("year"="year", "state"="state"))


# ------- Create variables
d2 <- d1 %>%
  dplyr::mutate(
    # Procurement dummy
    pcrmt = ifelse(
      (state=="Punjab"|state=="Haryana"|
         state=="Uttar Pradesh United"|
         state=="Andhra Pradesh"|
         state=="Madhya Pradesh United"|
         state=="Tamil Nadu"|state=="Odisha"|
         state=="West Bengal"), 1,0),
    # Real Agri value added, rupees crores
    agvad_real = agrvad0405_crore/real_deflator,
    # Real Total wage bill (measure 1), rupees crores
    wbill1_real = total_wage_bill_crore_united/real_deflator,
    # Real Total farm income (measure 1), rupees crore
    finc1_real = real_farm_1_crore,
    # Real Total wage bill (measure 2), rupees crore
    wbill2_real = wage_only_crore_united/real_deflator,
    # Real Total farm income (measure 2), rupees crore
    finc2_real = real_farm_2_crore,
    # Total agri workers
    agwrk = no_agri_worker_united,
    # Total cultivarors
    agcult = cultivators_united,
    
    # Real agri value added per agri popn (agri wrk + cult), rupees
    agvad_real_p = (agvad_real*10^7)/(agwrk + agcult),
    
    # Real Wage bill per agri worker (measure 1), rupees
    agwage1_real = (wbill1_real*10^7)/agwrk,
    # Real Wage bill per agri worker (measure 2), rupees
    agwage2_real = (wbill2_real*10^7)/agwrk,
    # Real farm income per cultivaror (measure 1), rupees
    agcultinc1_real = (finc1_real*10^7)/agcult,
    # Real farm income per cultivaror (measure 2), rupees
    agcultinc2_real = (finc2_real*10^7)/agcult,
    
    # Nominal Wage bill per agri worker (measure 1), rupees
    agwage1_nom = (total_wage_bill_crore_united*10^7)/agwrk,
    # Nominal Wage bill per agri worker (measure 2), rupees
    agwage2_nom = (wage_only_crore_united*10^7)/agwrk,
    # Nominal farm income per cultivaror (measure 1), rupees
    agcultinc1_nom = (real_farm_1_crore*real_deflator*10^7)/agcult,
    # Nominal farm income per cultivaror (measure 2), rupees
    agcultinc2_nom = (real_farm_1_crore*real_deflator*10^7)/agcult,
    # Total nominal agri income, rupees crores 
    tot_agcultinc1_nom = (real_farm_1_crore*real_deflator)
  ) %>%
  as.data.frame()



# ----------------- RESULTS ------------------------------ #

# --------- Table 2
# Data for Table
d2_tbl1 <- d2 %>%
  dplyr::filter(
    year==1988|year==1994|year==2000|year==2005|year==2012
  ) %>%
  dplyr::select(state,year,agcultinc1_nom)%>%
  pivot_wider(c(state,year),
              names_from = year,
              names_prefix = "yr",
              values_from = agcultinc1_nom) %>%
  arrange(-yr2012)%>%
  as.data.frame()

# Save
write.csv(d2_tbl1, file = "Agri-Income-Nominal.csv")



# --------- Table 3

# Data for Table
d2_tbl2 <- d2 %>%
  dplyr::filter(
    year==1988|year==1994|year==2000|year==2005|year==2012
  ) %>%
  dplyr::select(state,year,agcultinc1_real)%>%
  pivot_wider(c(state,year),
              names_from = year,
              names_prefix = "yr",
              values_from = agcultinc1_real) %>%
  arrange(-yr2012)%>%
  as.data.frame()

# Save
write.csv(d2_tbl2, file = "Agri-Income-Real.csv")


# --------- Table 4

# Data for Table
d2_tbl3 <- d2 %>%
  dplyr::filter(
    year==1988|year==1994|year==2000|year==2005|year==2012
  ) %>%
  dplyr::select(state,year,agcultinc1_real)%>%
  group_by(state) %>%
  dplyr::mutate(
    # Annual average growth rate
    aginc1_g = 100*((agcultinc1_real/dplyr::lag(agcultinc1_real,1))^(1/(year-dplyr::lag(year)))-1)
  ) %>%
  dplyr::select(-agcultinc1_real)%>%
  dplyr::filter(year!=1988) %>%
  pivot_wider(c(state,year),
              names_from = year,
              names_prefix = "yr",
              values_from = aginc1_g) %>%
  arrange(-yr2012)%>%
  as.data.frame()

# Save
write.csv(d2_tbl3, file = "Agri-Income-Real-Growth.csv")




# --------- Table 6
# Ranking of states; see results on screen

# Ranking by average agri income per cult
d2_rnk1 <- d2 %>%
  dplyr::select(
    year, state, agcultinc1_real
  ) %>%
  dplyr::filter(year==1988) %>%
  arrange(agcultinc1_real, na.rm=FALSE) %>%
  dplyr::select(-c(year,agcultinc1_real)) %>%
  dplyr::rename(y1988 = state)%>%
  as.data.frame()

d2_rnk2 <- d2 %>%
  dplyr::select(
    year, state, agcultinc1_real
  ) %>%
  dplyr::filter(year==1994) %>%
  arrange(agcultinc1_real) %>%
  dplyr::select(-c(year,agcultinc1_real)) %>%
  dplyr::rename(y1994 = state)%>%
  as.data.frame()


d2_rnk3 <- d2 %>%
  dplyr::select(
    year, state, agcultinc1_real
  ) %>%
  dplyr::filter(year==2000) %>%
  arrange(agcultinc1_real) %>%
  dplyr::select(-c(year,agcultinc1_real)) %>%
  dplyr::rename(y2000 = state)%>%
  as.data.frame()

d2_rnk4 <- d2 %>%
  dplyr::select(
    year, state, agcultinc1_real
  ) %>%
  dplyr::filter(year==2005) %>%
  arrange(agcultinc1_real) %>%
  dplyr::select(-c(year,agcultinc1_real)) %>%
  dplyr::rename(y2005 = state)%>%
  as.data.frame()


d2_rnk5 <- d2 %>%
  dplyr::select(
    year, state, agcultinc1_real
  ) %>%
  dplyr::filter(year==2012) %>%
  arrange(agcultinc1_real) %>%
  dplyr::select(-c(year,agcultinc1_real)) %>%
  dplyr::rename(y2012 = state)%>%
  as.data.frame()


# ---------------------------------------------------------- #
# ----------------- Panel data regressions ----------------- #

# ------- Create panel data set

# Read BBS data set
bbs <- readstata13::read.dta13("BBS-Data.dta")
# Merge
d2_1 <- left_join(d2, bbs, by=c("scode" = "st_code", "year" = "year")) %>%
  # Drop obs with negative farm income (Tripura in 2000)
  dplyr::filter(agcultinc1_real>0) %>%
  group_by(state) %>%
  dplyr::mutate(
    # Annual aveerage growth rate
    aginc1_g = ((agcultinc1_real/dplyr::lag(agcultinc1_real,1))^(1/(year-dplyr::lag(year)))-1)
  ) %>%
  mutate(
    # Log agri income, 1 lag
    laginc1_l1 = dplyr::lag(log(agcultinc1_real))
  ) %>%
  as.data.frame()

# Set as panel
d2_new <- pdata.frame(d2_1, index = c("state","year"))
pdim(d2_new)


# --------- Table 5

# Is there catch up? 

# Unconditional convergence
res_cup1 <- plm(
  aginc1_g ~ laginc1_l1, 
  model = "pooling", data = d2_new)

r1 <- coeftest(res_cup1, function(x) vcovHC(x, type = 'sss'))
r1_o <- nobs(res_cup1)

# Unconditional convergence
res_cup2 <- plm(
  aginc1_g ~ laginc1_l1, 
  model = "within", data = d2_new)

r2 <- coeftest(res_cup2, function(x) vcovHC(x, type = 'sss'))
r2_o <- nobs(res_cup2)

# Conditional convergence
res_cup3 <- plm(
  aginc1_g ~ laginc1_l1 + factor(year), 
  model = "within", data = d2_new)

r3 <- coeftest(res_cup3, function(x) vcovHC(x, type = 'sss'))
r3_o <- nobs(res_cup3)

# Additional rows to add to results
reg_obs <- c("Observations", r1_o,r2_o,r3_o)
reg_fes <- c("State FE", "N","Y", "Y")
reg_fet <- c("Year FE", "N","N", "Y")


# See results on screen
stargazer(
  r1, r2, r3,
  keep = "laginc1_l1",
  type = "text", style = "qje",
  add.lines = list(reg_fes,reg_fet,
                   reg_obs)
)


# -------- Crop diversification and Income 
# Reported in footnote 10 of the paper
summary(
  plm(I(agcultinc1_real/1000) ~ HD + factor(year), 
      data = d2_new, model = "pooling")
)


# --------- Table 7

# Has reforms increased income?

# State and year fixed effects
res2 <- plm(I(agcultinc1_real/1000) ~ mktrfm + factor(year), 
            data = d2_new, model = "within")

res2_c <- coeftest(res2, function(x) vcovHC(x, type = 'sss'))
res2_o <- nobs(res2)

# Add log PCNSDP
res3 <- plm(I(agcultinc1_real/1000) ~ mktrfm + lnsdp + factor(year), 
            data = d2_new, model = "within")

res3_c <- coeftest(res3, function(x) vcovHC(x, type = 'sss'))
res3_o <- nobs(res3)

# Add tax and nontax revenue
res4 <- plm(I(agcultinc1_real/1000) ~ mktrfm + lnsdp + factor(year) +
              otrev_sh + ontrev_sh, 
            data = d2_new, model = "within")

res4_c <- coeftest(res4, function(x) vcovHC(x, type = 'sss'))
res4_o <- nobs(res4)

# Add HD (crop diversification index)
res5 <- plm(I(agcultinc1_real/1000) ~ mktrfm + lnsdp + factor(year) +
              otrev_sh + ontrev_sh + HD, 
            data = d2_new, model = "within")

res5_c <- coeftest(res5, function(x) vcovHC(x, type = 'sss'))
res5_o <- nobs(res5)

# Additional rows to add to results
reg_obs <- c("Observations", res2_o,res3_o,
             res4_o,res5_o)
reg_fes <- c("State FE", "Y", "Y", "Y","Y")
reg_fet <- c("Year FE", "Y", "Y", "Y","Y")
reg_nsdp <- c("Log PCNSDP", "N", "Y", "Y","Y")
reg_tax <- c("Tax + NonTax Sh", "N","N", "Y","Y")
reg_HD <- c("Tax + NonTax Sh", "N","N", "N","Y")


# See results on screen
stargazer(
  res2_c, res3_c, res4_c, res5_c,
  keep = c("mktrfm"),
  covariate.labels = c("TREAT x AFTER"),
  type = "text", style = "qje",
  add.lines = list(reg_fes,reg_fet,reg_nsdp,
                   reg_tax,reg_HD,reg_obs)
)


# --------- Table 8
# Test of parallel trends 
# Create pre-reform data set
d2_new1 <- d2_new %>%
  dplyr::mutate(YEAR = as.numeric(as.character(year))) %>%
  dplyr::filter(YEAR<=2000) %>%
  as.data.frame()

# Set as panel data set
d2_new2 <- pdata.frame(d2_new1, index = c("state","year"))
pdim(d2_new2)


# Main regression
res7 <- plm(I(agcultinc1_real/1000) ~ lnsdp + YEAR:treat1 +
              otrev_sh + ontrev_sh + factor(year), 
            data = d2_new2, model = "within")

res7_c <- coeftest(res7, function(x) vcovHC(x, type = 'sss'))
res7_o <- nobs(res7)

# See result on screen
stargazer(
  res7_c,
  keep = c("YEAR:treat1"),
  type = "text"
)

# Number of observations
(res7_o)



# --------------------------------------------------------------- #
# -------------------- Appendix Tables -------------------------- #

# --------- Total nominal and real incomes
d3 <- d2 %>%
  dplyr::select(
    year, state, agrvad0405_crore, total_wage_bill_crore_united,
    tot_agcultinc1_nom, agvad_real, wbill1_real, finc1_real, 
    agwrk, agcult
  ) %>%
  as.data.frame()

# Save
write.csv(d3, file = "Total-Income.csv")


# --------- Per person nominal and real incomes
d4 <- d2 %>%
  dplyr::select(
    year, state, agwage1_nom, agcultinc1_nom, 
    agwage1_real, agcultinc1_real
  ) %>%
  as.data.frame()

# Save
write.csv(d4, file = "Per-Person-Income.csv")

